################
# FIGURE 2
################
library(rstan)
library(stargazer)
library(data.table)
library(countrycode)
library(Amelia)
load("migration_model_data.RData")
source("create_bias_measures.R")
set.seed(42)
vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
          "pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
          "year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o", 
          "region_d", "gdpcap_o", "gdpcap_d", "cw_o", "dyad")
d <- bilateral_migration_data[, vars, with = FALSE]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$year <- as.numeric(d$year)
ref_table <- d[, .(dyad, year, log_flow)]
d[, c("region_o", "region_d",
      "pop_d", "urate_d", "distw", "gdpcap_o", "gdpcap_d", "dyad") := NULL]

 
imp_amelia <- amelia(x = d, m = 1, 
  idvars = c("iso3_o", "iso3_d"), ts = "year")

year <- as.numeric(imp_amelia$imputations$imp1$year)
iso3o <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_o))
iso3d <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_d))
ys <- as.numeric(imp_amelia$imputations$imp1$log_flow)
X <- as.matrix(imp_amelia$imputations$imp1)
X <- X[, -c(1, 11:13)]
for(k in 1:ncol(X)){
    X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1") 
  }
X <- cbind(1, X)
X <- apply(X, 2, as.numeric)

dl1 <- list(
  K = ncol(X),
  N = nrow(X),
  y_obs = ys,
  X = X,
  N_d = max(iso3d),
  N_o = max(iso3o),
  i_d = iso3d,
  i_o = iso3o,
  N_year = max(year),
  i_year = year
)
rm(imp_amelia)
rm(d)
rm(bilateral_migration_data)



sm <- stan_model("weibull_hurdle_6.stan")
sf <- vb(sm, data = dl1, output_samples = 1000)
ex <- extract(sf)
resids <- ex$resids
rm(sm, sf, ex, X, dl1, iso3d, iso3o, year, ys)
bias <- apply(resids, 2, median)
rm(resids)
ref_table$bias <- bias
rm(bias)
ref_table$flow <- ref_table$log_flow
bias_dat <- ref_table
bias_dat <- create_bias_measures(bias_dat)

bias_dat <- bias_dat[, .(bias = mean(bias)), by = c("origin_name", "year")]

oecd <- c("Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland", 
          "France", "Germany", "Greece", "Iceland", "Ireland", "Italy", 
          "Japan", "Mexico", "Netherlands", "New Zealand", "Norway", "Portugal", 
          "Spain", "Sweden", "Switzerland", "United Kingdom", "United States"
          )

skin <- read.csv("skincolor.csv")
bias_dat <- merge(bias_dat, skin, by.x = "origin_name", by.y = "country")
bias_dat[, oecd := ifelse(origin_name %in% oecd, 1, 0)]
bias_dat[, north := ifelse(medical %in% c("Chinese", "Japanese", "White"), 1, 0)]
bias_dat[, black := ifelse(!(medical %in% c("Black")), 0, 1)]

oecd <- data.table::data.table(bias = bias_dat[oecd == 1, mean(bias), by = year],
                               cat = "OECD")
black <- data.table::data.table(bias = bias_dat[black == 1, mean(bias), by = year],
                                cat = "Black")
south <- data.table::data.table(bias = bias_dat[north == 0, mean(bias), by = year],
                                cat = "Global South")

ggdat <- rbind(oecd, black, south)

setnames(ggdat, c("year", "bias", "race"))
ggdat$year <- factor(as.numeric(ggdat$year), 
                     labels = c("1991-1995", "1996-2000", "2001-2005", "2006-2010"))

tiff("black_bias_over_time_am.tiff", width = 6, height = 6, units = 'in', res = 600)
ggplot(ggdat, aes(x = year, y = bias, group = race)) + 
  geom_line(aes(linetype = race)) + 
  theme_minimal(base_size = 14) +
  ylab("Average Deviation") +
  xlab("Time Period") +
  theme(legend.title = element_blank(),
        text = element_text(family = "Roboto Condensed")) +
  ggtitle("Comparing Deviation Densities")
dev.off()
